import pandas as pd
import numpy as np
import matplotlib.pyplot as plt###引入库包
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.model_selection import cross_val_score,cross_validate
from statistics import mean
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from collections import defaultdict
from pandas import set_option
from operator import itemgetter
from sklearn.metrics import explained_variance_score,r2_score,median_absolute_error,mean_squared_error,mean_absolute_error
from matplotlib.colors import LogNorm
from scipy.stats import gaussian_kde
from scipy import stats
from sklearn import metrics
from sklearn.metrics import r2_score
import time
import csv
import xgboost as xgb
import codecs
from datetime import datetime

dir_path='D:/Data/ningbo/'
data_original=pd.read_csv(dir_path+'training_dataset.csv',header=0, index_col=0)
# print(data_original)
data_original = data_original.drop(index=data_original.index[data_original['True_PBLH'].isin([0])],axis=0)
data_original = data_original.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
# 重新设置行的索引，从1开始命名
data_original = data_original.reset_index(drop=True)
print(data_original)
print(np.min(data_original['True_PBLH'].values),np.max(data_original['True_PBLH'].values))
# exit()

# time_date=data_original['Time'].astype(str)  #这里是吧datframe数据的某一列转换为str数据
time_date=data_original['Time'].tolist()
# print(time_date)
# exit()
time_date=[str(item) for item in time_date]
# 将字符串转换成datetime对象
time_date = [datetime.strptime(item, "%Y-%m-%d %H:%M:%S") for item in time_date]
# 将datetime对象转换成所需的时间年月日格式
time_date = [item.strftime("%Y-%m-%d") for item in time_date]

hour=[]
time_all=data_original['Time'].tolist()
import re
for hours in time_all:      #在原始数据加一列小时的数据
    data_list = re.findall(r"\d+", hours)
    # print(data_list)
    # exit()
    hour.append(data_list[3])
hour = [int(x) for x in hour]
# data_original['time_date']=time_date  #这里是直接添加到最后一列
#sunny:2019-11-10  2019-12-04   2020-04-09
#cloudy:2020-04-10  2019-11-17
#raining:2019-12-02  2019-12-03

period=1
data_original.insert(0, 'time_date', time_date)
data_original.insert(0, 'Hour', hour)
exclued_data_sunny = data_original.loc[
    (data_original['time_date'] >= '2019-11-10') & (data_original['time_date'] <= '2019-11-10')]
# print(exclued_data_sunny)
# 按照某一列的名称丢弃特定值所在的行
data_original=data_original.drop(data_original[data_original['time_date'] == '2019-11-10'].index)

exclued_data_cloudy = data_original.loc[
    (data_original['time_date'] >= '2019-11-17') & (data_original['time_date'] <= '2019-11-17')]
# print(exclued_data_cloudy)
data_original=data_original.drop(data_original[data_original['time_date'] == '2019-11-17'].index)

exclued_data_rainy = data_original.loc[
    (data_original['time_date'] >= '2019-12-02') & (data_original['time_date'] <= '2019-12-02')]
# print(exclued_data_rainy)
data_original=data_original.drop(data_original[data_original['time_date'] == '2019-12-02'].index)
# print(data_original)
# exit()

feature_list_original = list(data_original.columns)[2:]

sunny_date  =['2019-11-01','2019-11-01','2019-11-03','2019-11-04','2019-11-08','2019-11-11','2019-11-13','2019-11-16','2019-11-18','2019-11-18','2019-12-02','2019-12-04','2019-12-06','2019-12-12','2019-12-15','2019-12-16','2019-12-27','2019-12-27',
              '2020-01-14','2020-01-14','2020-01-19','2020-01-20','2020-01-29','2020-02-01',
              '2020-02-09','2020-02-09','2020-02-16','2020-02-18','2020-02-20','2020-02-20','2020-02-22','2020-02-24','2020-03-05','2020-03-05','2020-03-10','2020-03-10','2020-03-14','2020-03-14','2020-03-18','2020-03-20','2020-03-28','2020-03-28',
              '2020-04-07','2020-04-07','2020-04-09','2020-04-09','2020-04-13','2020-04-13','2020-04-15','2020-04-16']
cloudy_date =['2019-11-02','2019-11-02','2019-11-05','2019-11-07','2019-11-12','2019-11-12','2019-11-17','2019-11-17','2019-11-19','2019-11-23','2019-11-29','2019-11-29','2019-12-13','2019-12-14','2019-12-22','2019-12-22','2019-12-28','2019-12-28',
              '2019-12-31','2020-01-02','2020-01-05','2020-01-08','2020-01-12','2020-01-12',
              '2020-01-17','2020-01-18','2020-01-28','2020-01-28','2020-02-03','2020-02-05','2020-02-08','2020-02-08','2020-02-12','2020-02-14','2020-02-25','2020-02-27','2020-03-11','2020-03-11','2020-03-15','2020-03-15','2020-03-22','2020-03-22',
              '2020-03-24','2020-03-24','2020-04-01','2020-04-01','2020-04-03','2020-04-04','2020-04-08','2020-04-08','2020-04-10','2020-04-11','2020-04-18','2020-04-19','2020-04-22','2020-04-22']
raining_date=['2019-11-24','2019-11-28','2019-11-30','2019-12-01','2019-12-05','2019-12-05','2019-12-17','2019-12-21','2019-12-23','2019-12-26','2019-12-29','2019-12-30','2020-01-03','2020-01-04','2020-01-09','2020-01-11','2020-01-13','2020-01-13',
              '2020-01-15','2020-01-16','2020-01-21','2020-01-27','2020-02-02','2020-02-02','2020-02-06','2020-02-07','2020-02-10','2020-02-11','2020-02-15','2020-02-15','2020-02-19','2020-02-19','2020-02-21','2020-02-21','2020-02-28','2020-03-04',
              '2020-03-06','2020-03-09','2020-03-12','2020-03-13','2020-03-16','2020-03-17','2020-03-21','2020-03-21','2020-03-23','2020-03-23','2020-03-25','2020-03-27','2020-03-29','2020-03-31','2020-04-02','2020-04-02','2020-04-05','2020-04-06',
              '2020-04-12','2020-04-12','2020-04-14','2020-04-14','2020-04-17','2020-04-17','2020-04-20','2020-04-21']

sunny_data_original=pd.DataFrame(None)
for i in range(0, len(sunny_date),2):
    sunny_data_original_select= data_original.loc[(data_original['time_date'] >= sunny_date[i]) & (data_original['time_date'] <= sunny_date[i+1])]
    sunny_data_original = pd.concat([sunny_data_original,sunny_data_original_select], ignore_index=True)
cloudy_data_original=pd.DataFrame(None)
for i in range(0, len(cloudy_date),2):
    cloudy_data_original_select= data_original.loc[(data_original['time_date'] >= cloudy_date[i]) & (data_original['time_date'] <= cloudy_date[i+1])]
    cloudy_data_original = pd.concat([cloudy_data_original,cloudy_data_original_select], ignore_index=True)
rainy_data_original=pd.DataFrame(None)
for i in range(0, len(raining_date),2):
    raining_data_original_select= data_original.loc[(data_original['time_date'] >= raining_date[i]) & (data_original['time_date'] <= raining_date[i+1])]
    rainy_data_original = pd.concat([rainy_data_original,raining_data_original_select], ignore_index=True)
print(sunny_data_original)
print(cloudy_data_original)
print(rainy_data_original)
# exit()

x_training_feature_list=['Hour','10V', '10U', 'TP', 'SP', 'SLHF', 'SNTR', 'SSHF', 'TEE',
                          'FA', 'SNSR', 'SSRD', 'STRD', 'DT2m', 'ST', 'T2m', 'RH', 'SLTD','ERA5_pblh']

def training_test_data_and_single_data_split(different_data_original,different_single_data_original):
    x_training_original_sunny = different_data_original[x_training_feature_list].values
    y_training_original_sunny = different_data_original['True_PBLH'].values

    data_x = x_training_original_sunny[::, :]
    data_y = y_training_original_sunny
    x_train_sunny, x_test_sunny, y_train_sunny, y_test_sunny = train_test_split(data_x, data_y,
                                                                                test_size=0.2)  # ,random_state=100
    x_train_exclude_era5_sunny, x_test_exclude_era5_sunny = x_train_sunny[::, 0:-1], x_test_sunny[::, 0:-1]
    y_test_era5_pblh_sunny = x_test_sunny[:, -1]

    x_training_original_single1_sunny = different_single_data_original[x_training_feature_list].values
    y_training_original_single1_sunny = different_single_data_original['True_PBLH'].values
    data_x_single1_sunny = x_training_original_single1_sunny[::, :]
    data_y_single1_sunny = y_training_original_single1_sunny
    x_test1_sunny, y_test1_sunny = data_x_single1_sunny, data_y_single1_sunny
    x_test1_exclude_era5_sunny = x_test1_sunny[::, 0:-1]
    y_test1_era5_pblh_sunny = x_test1_sunny[:, -1]

    return x_train_exclude_era5_sunny,x_test_exclude_era5_sunny,y_train_sunny,y_test_sunny,y_test_era5_pblh_sunny,x_test1_exclude_era5_sunny,y_test1_sunny,y_test1_era5_pblh_sunny
x_train_sunny,x_test_sunny,y_train_sunny,y_test_sunny,y_test_sunny_ers5_pblh,x_test_sunny_single,y_test_sunny_single,y_test_sunny_single_era5_pblh=training_test_data_and_single_data_split(sunny_data_original,exclued_data_sunny)
x_train_cloudy,x_test_cloudy,y_train_cloudy,y_test_cloudy,y_test_cloudy_ers5_pblh,x_test_cloudy_single,y_test_cloudy_single,y_test_cloudy_single_era5_pblh=training_test_data_and_single_data_split(cloudy_data_original,exclued_data_cloudy)
x_train_rainy,x_test_rainy,y_train_rainy,y_test_rainy,y_test_rainy_ers5_pblh,x_test_rainy_single,y_test_rainy_single,y_test_rainy_single_era5_pblh=training_test_data_and_single_data_split(rainy_data_original,exclued_data_rainy)

weather_situation='cloudy'
model='Stacking model'
have_MWR = 'without_MWR'
training_time='a'

if weather_situation=='sunny':
    seleced_date='2019-11-10'
    X_train_base, y_train_base = x_train_sunny, y_train_sunny
    n_estimators = 800
    min_samples_split = 2
    min_samples_leaf = 1
    max_depth = 30
    random_model = RandomForestRegressor(criterion='friedman_mse', n_estimators=n_estimators,
                                         min_samples_split=min_samples_split,
                                         min_samples_leaf=min_samples_leaf, max_depth=max_depth, bootstrap=True)
    max_depth = 25
    learning_rate = 0.1
    adaboost_base = AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth), learning_rate=learning_rate,
                                     loss='linear', n_estimators=n_estimators)

    learning_rate = 0.25
    n_estimators = 700
    num_leaves = 62
    max_depth = 25
    lgt_model = lgb.LGBMRegressor(learning_rate=learning_rate, n_estimators=n_estimators, num_leaves=num_leaves,
                                     max_depth=max_depth)
    # 基础模型训练
    # random_model = RandomForestRegressor(n_estimators=n_estimators, random_state=42)
    # adaboost_base = AdaBoostRegressor(n_estimators=n_estimators, random_state=42)    # 设置随机森林模型的损失函数为自定义目标函数

    random_model.fit(X_train_base, y_train_base)
    adaboost_base.fit(X_train_base, y_train_base)
    lgt_model.fit(X_train_base, y_train_base)

    # 基础模型预测结果
    rf_pred = random_model.predict(X_train_base)
    adaboost_pred = adaboost_base.predict(X_train_base)
    lgt_pred = lgt_model.predict(X_train_base)

    def mse_loss(weights, *preds_and_true):
        preds = np.column_stack(preds_and_true[:-1])
        true_y = preds_and_true[-1]
        weighted_avg_pred = np.dot(preds, weights)
        return np.mean((true_y - weighted_avg_pred) ** 2)

    init_weights = np.array([1 / 3, 1 / 3, 1 / 3])
    from scipy.optimize import minimize

    result = minimize(mse_loss, init_weights, args=(rf_pred, adaboost_pred, lgt_pred, y_train_base))
    weights = result.x
    print("最佳权重：", weights)

    rf_test_pred = random_model.predict(x_test_sunny)
    adaboost_test_pred = adaboost_base.predict(x_test_sunny)
    lgt_test_pred = lgt_model.predict(x_test_sunny)

    preds = np.array([rf_test_pred, adaboost_test_pred, lgt_test_pred])
    ypredict_sunny = np.average(preds, axis=0, weights=weights)

    rf_test_pred_single = random_model.predict(x_test_sunny_single)
    adaboost_test_pred_single = adaboost_base.predict(x_test_sunny_single)
    lgt_test_pred_single = lgt_model.predict(x_test_sunny_single)

    preds_single = np.array([rf_test_pred_single, adaboost_test_pred_single, lgt_test_pred_single])
    ypredict_sunny_single = np.average(preds_single, axis=0, weights=weights)


    # Tset_score, Train_score, Test_single_score = random_model.score(x_test_sunny, y_test_sunny), random_model.score(
    #     x_train_sunny, y_train_sunny), random_model.score(x_test_sunny_single, y_test_sunny_single)
    # print('rf score is', Tset_score, Train_score,
    #       Test_single_score)  # ,random_model.score(x_test2_exclude_era5,y_test2)

    # 评估回归性能
    def Regression_performance(y, y_predict):
        MAE = metrics.mean_absolute_error(y, y_predict)
        MSE = metrics.mean_squared_error(y, y_predict)
        RMSE = np.sqrt(metrics.mean_squared_error(y, y_predict))
        r2 = r2_score(y, y_predict)
        return MAE, MSE, RMSE, r2

    # MAE_train, MSE_train, RMSE_train, r2_train = Regression_performance(y_train_sunny, y_predict_train_sunny)
    # print('Train_Mean Absolute Error:', MAE_train, '\n', 'Train_Mean Squared Error:', MSE_train, '\n',
    #       'Train_Root Mean Squared Error:', RMSE_train, '\n', 'Train_R2:', r2_train)

    MAE_test, MSE_test, RMSE_test, r2_test = Regression_performance(y_test_sunny, ypredict_sunny)
    print('Test_Mean Absolute Error:', MAE_test, '\n', 'Test_Mean Squared Error:', MSE_test, '\n',
          'Test_Root Mean Squared Error:', RMSE_test, '\n', 'Test_R2:', r2_test)

    MAE_test1, MSE_test1, RMSE_test1, r2_test1 = Regression_performance(y_test_sunny_single, ypredict_sunny_single)
    print('Test_single_Mean Absolute Error:', MAE_test1, '\n', 'Test_single_Mean Squared Error:', MSE_test1, '\n',
          'Test_single_Root Mean Squared Error:', RMSE_test1, '\n', 'Test_single_R2:', r2_test1)

    MAE_test1_era5, MSE_test1_era5, RMSE_test1_era5, r2_test1_era5 = Regression_performance(
        y_test_sunny_single_era5_pblh, y_test_sunny_single)
    print('Test_single_Mean Absolute Error_era5:', MAE_test1_era5, '\n', 'Test_single_Mean Squared Error_era5:',
          MSE_test1_era5, '\n',
          'Test_single_Root Mean Squared Error_era5:', RMSE_test1_era5, '\n', 'Test_single_R2_era5:', r2_test1_era5)

    def save_test_results():
        date_today = time.strftime('%Y-%m-%d %H:%M:%S').split()  # 按空格分开，这里宫格是我自己添加的
        # print(date_today)     # ['2021-07-22', '16:11:00']
        hour_minute = date_today[1].split(':')
        feature_list=['Hour','10V', '10U', 'TP', 'SP', 'SLHF', 'SNTR', 'SSHF', 'TEE',
                        'FA', 'SNSR', 'SSRD', 'STRD', 'DT2m', 'ST', 'T2m', 'RH', 'SLTD']

        x_test_dataframe = pd.DataFrame(x_test_sunny, columns=feature_list)
        y_test_dataframe = pd.DataFrame(y_test_sunny, columns=['BLH_true'])
        y_test_dataframe_era5 = pd.DataFrame(y_test_sunny_ers5_pblh, columns=['BLH_era5'])
        ypredict_dataframe = pd.DataFrame(ypredict_sunny, columns=['BLH_predict'])

        x_test1_dataframe = pd.DataFrame(x_test_sunny_single, columns=feature_list)
        y_test1_dataframe = pd.DataFrame(y_test_sunny_single, columns=['BLH_true'])
        y_test1_dataframe_era5 = pd.DataFrame(y_test_sunny_single_era5_pblh, columns=['BLH_era5'])
        ypredict1_dataframe = pd.DataFrame(ypredict_sunny_single, columns=['BLH_predict'])

        x_train_dataframe = pd.DataFrame(x_train_sunny, columns=feature_list)
        y_train_dataframe = pd.DataFrame(y_train_sunny, columns=['BLH_true'])

        save_file_path = 'D:/Data/ningbo/training_results/' + model + '/' + weather_situation + '/'  + training_time + '/'

        x_test_dataframe.to_csv(
            save_file_path + model + '_x_test_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_test_dataframe.to_csv(
            save_file_path + model + '_y_test_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_test_dataframe_era5.to_csv(
            save_file_path + model + '_y_test_dataframe_era5_' + weather_situation + '_' + have_MWR + '_' + date_today[
            0] + '_' +hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        ypredict_dataframe.to_csv(
            save_file_path + model + '_ypredict_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')

        x_test1_dataframe.to_csv(
            save_file_path + model + '_x_test_single_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_test1_dataframe.to_csv(
            save_file_path + model + '_y_test_single_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_test1_dataframe_era5.to_csv(
            save_file_path + model + '_y_test_single_dataframe_era5' + weather_situation + '_' + have_MWR + '_' +
            date_today[0] + '_' +hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        ypredict1_dataframe.to_csv(
            save_file_path + model + '_ypredict_single_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[
                0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')

        x_train_dataframe.to_csv(
            save_file_path + model + 'x_train_dataframe' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_train_dataframe.to_csv(
            save_file_path + model + 'y_train_dataframe' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')

    save_test_results = save_test_results()

if weather_situation=='cloudy':
    seleced_date='2020-04-10'
    X_train_base, y_train_base = x_train_cloudy, y_train_cloudy
    n_estimators = 800
    min_samples_split = 2
    min_samples_leaf = 1
    max_depth = 30
    random_model = RandomForestRegressor(criterion='friedman_mse', n_estimators=n_estimators,
                                         min_samples_split=min_samples_split,
                                         min_samples_leaf=min_samples_leaf, max_depth=max_depth, bootstrap=True)
    max_depth = 25
    learning_rate = 0.1
    adaboost_base = AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth), learning_rate=learning_rate,
                                      loss='linear', n_estimators=n_estimators)

    learning_rate = 0.25
    n_estimators = 700
    num_leaves = 62
    max_depth = 25
    lgt_model = lgb.LGBMRegressor(learning_rate=learning_rate, n_estimators=n_estimators, num_leaves=num_leaves,
                                  max_depth=max_depth)

    # random_model = RandomForestRegressor(n_estimators=n_estimators, random_state=42)
    # adaboost_base = AdaBoostRegressor(n_estimators=n_estimators, random_state=42)    # 设置随机森林模型的损失函数为自定义目标函数

    random_model.fit(X_train_base, y_train_base)
    adaboost_base.fit(X_train_base, y_train_base)
    lgt_model.fit(X_train_base, y_train_base)

    rf_pred = random_model.predict(X_train_base)
    adaboost_pred = adaboost_base.predict(X_train_base)
    lgt_pred = lgt_model.predict(X_train_base)

    def mse_loss(weights, *preds_and_true):
        preds = np.column_stack(preds_and_true[:-1])
        true_y = preds_and_true[-1]
        weighted_avg_pred = np.dot(preds, weights)
        return np.mean((true_y - weighted_avg_pred) ** 2)
    init_weights = np.array([1 / 3, 1 / 3, 1 / 3])
    from scipy.optimize import minimize
    result = minimize(mse_loss, init_weights, args=(rf_pred, adaboost_pred, lgt_pred, y_train_base))
    weights = result.x
    print("最佳权重：", weights)

    rf_test_pred = random_model.predict(x_test_cloudy)
    adaboost_test_pred = adaboost_base.predict(x_test_cloudy)
    lgt_test_pred = lgt_model.predict(x_test_cloudy)

    preds = np.array([rf_test_pred, adaboost_test_pred, lgt_test_pred])
    ypredict_cloudy = np.average(preds, axis=0, weights=weights)

    rf_test_pred_single = random_model.predict(x_test_cloudy_single)
    adaboost_test_pred_single = adaboost_base.predict(x_test_cloudy_single)
    lgt_test_pred_single = lgt_model.predict(x_test_cloudy_single)

    preds_single = np.array([rf_test_pred_single, adaboost_test_pred_single, lgt_test_pred_single])
    ypredict_cloudy_single = np.average(preds_single, axis=0, weights=weights)

    def Regression_performance(y, y_predict):
        MAE = metrics.mean_absolute_error(y, y_predict)
        MSE = metrics.mean_squared_error(y, y_predict)
        RMSE = np.sqrt(metrics.mean_squared_error(y, y_predict))
        r2 = r2_score(y, y_predict)
        return MAE, MSE, RMSE, r2

    # MAE_train, MSE_train, RMSE_train, r2_train = Regression_performance(y_train_cloudy, y_predict_train_cloudy)
    # print('Train_Mean Absolute Error:', MAE_train, '\n', 'Train_Mean Squared Error:', MSE_train, '\n',
    #       'Train_Root Mean Squared Error:', RMSE_train, '\n', 'Train_R2:', r2_train)

    MAE_test, MSE_test, RMSE_test, r2_test = Regression_performance(y_test_cloudy, ypredict_cloudy)
    print('Test_Mean Absolute Error:', MAE_test, '\n', 'Test_Mean Squared Error:', MSE_test, '\n',
          'Test_Root Mean Squared Error:', RMSE_test, '\n', 'Test_R2:', r2_test)

    MAE_test1, MSE_test1, RMSE_test1, r2_test1 = Regression_performance(y_test_cloudy_single, ypredict_cloudy_single)
    print('Test_single_Mean Absolute Error:', MAE_test1, '\n', 'Test_single_Mean Squared Error:', MSE_test1, '\n',
          'Test_single_Root Mean Squared Error:', RMSE_test1, '\n', 'Test_single_R2:', r2_test1)

    MAE_test1_era5, MSE_test1_era5, RMSE_test1_era5, r2_test1_era5 = Regression_performance(
        y_test_cloudy_single_era5_pblh, y_test_cloudy_single)
    print('Test_single_Mean Absolute Error_era5:', MAE_test1_era5, '\n', 'Test_single_Mean Squared Error_era5:',
          MSE_test1_era5, '\n',
          'Test_single_Root Mean Squared Error_era5:', RMSE_test1_era5, '\n', 'Test_single_R2_era5:', r2_test1_era5)

    def save_test_results():
        date_today = time.strftime('%Y-%m-%d %H:%M:%S').split()  # 按空格分开，这里宫格是我自己添加的
        # print(date_today)     # ['2021-07-22', '16:11:00']
        hour_minute = date_today[1].split(':')
        feature_list=['Hour','10V', '10U', 'TP', 'SP', 'SLHF', 'SNTR', 'SSHF', 'TEE',
                         'FA', 'SNSR', 'SSRD', 'STRD', 'DT2m', 'ST', 'T2m', 'RH','SLTD']

        x_test_dataframe = pd.DataFrame(x_test_cloudy, columns=feature_list)
        y_test_dataframe = pd.DataFrame(y_test_cloudy, columns=['BLH_true'])
        y_test_dataframe_era5 = pd.DataFrame(y_test_cloudy_ers5_pblh, columns=['BLH_era5'])
        ypredict_dataframe = pd.DataFrame(ypredict_cloudy, columns=['BLH_predict'])

        x_test1_dataframe = pd.DataFrame(x_test_cloudy_single, columns=feature_list)
        y_test1_dataframe = pd.DataFrame(y_test_cloudy_single, columns=['BLH_true'])
        y_test1_dataframe_era5 = pd.DataFrame(y_test_cloudy_single_era5_pblh, columns=['BLH_era5'])
        ypredict1_dataframe = pd.DataFrame(ypredict_cloudy_single, columns=['BLH_predict'])

        x_train_dataframe = pd.DataFrame(x_train_cloudy, columns=feature_list)
        y_train_dataframe = pd.DataFrame(y_train_cloudy, columns=['BLH_true'])

        save_file_path = 'D:/Data/ningbo/training_results/' + model + '/' + weather_situation + '/'  + training_time + '/'

        x_test_dataframe.to_csv(
            save_file_path + model + '_x_test_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_test_dataframe.to_csv(
            save_file_path + model + '_y_test_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_test_dataframe_era5.to_csv(
            save_file_path + model + '_y_test_dataframe_era5_' + weather_situation + '_' + have_MWR + '_' + date_today[
            0] + '_' +hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        ypredict_dataframe.to_csv(
            save_file_path + model + '_ypredict_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')

        x_test1_dataframe.to_csv(
            save_file_path + model + '_x_test_single_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_test1_dataframe.to_csv(
            save_file_path + model + '_y_test_single_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_test1_dataframe_era5.to_csv(
            save_file_path + model + '_y_test_single_dataframe_era5' + weather_situation + '_' + have_MWR + '_' +
            date_today[0] + '_' +hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        ypredict1_dataframe.to_csv(
            save_file_path + model + '_ypredict_single_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[
                0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')

        x_train_dataframe.to_csv(
            save_file_path + model + 'x_train_dataframe' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_train_dataframe.to_csv(
            save_file_path + model + 'y_train_dataframe' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')

    save_test_results = save_test_results()

if weather_situation=='rainy':
    seleced_date='2019-12-02'
    X_train_base, y_train_base = x_train_rainy, y_train_rainy
    n_estimators = 600
    min_samples_split = 2
    min_samples_leaf = 1
    max_depth = 30
    random_model = RandomForestRegressor(criterion='friedman_mse', n_estimators=n_estimators,
                                         min_samples_split=min_samples_split,
                                         min_samples_leaf=min_samples_leaf, max_depth=max_depth, bootstrap=True)
    max_depth = 25
    learning_rate = 0.1
    adaboost_base = AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth), learning_rate=learning_rate,
                                      loss='linear', n_estimators=n_estimators)

    learning_rate = 0.25
    n_estimators = 700
    num_leaves = 62
    max_depth = 25
    lgt_model = lgb.LGBMRegressor(learning_rate=learning_rate, n_estimators=n_estimators, num_leaves=num_leaves,
                                  max_depth=max_depth)
    # 基础模型训练
    # random_model = RandomForestRegressor(n_estimators=n_estimators, random_state=42)
    # adaboost_base = AdaBoostRegressor(n_estimators=n_estimators, random_state=42)

    random_model.fit(X_train_base, y_train_base)
    adaboost_base.fit(X_train_base, y_train_base)
    lgt_model.fit(X_train_base, y_train_base)

    # 基础模型预测结果
    rf_pred = random_model.predict(X_train_base)
    adaboost_pred = adaboost_base.predict(X_train_base)
    lgt_pred = lgt_model.predict(X_train_base)

    def mse_loss(weights, *preds_and_true):
        preds = np.column_stack(preds_and_true[:-1])
        true_y = preds_and_true[-1]
        weighted_avg_pred = np.dot(preds, weights)
        return np.mean((true_y - weighted_avg_pred) ** 2)

    init_weights = np.array([1 / 3, 1 / 3, 1 / 3])
    from scipy.optimize import minimize

    result = minimize(mse_loss, init_weights, args=(rf_pred, adaboost_pred, lgt_pred, y_train_base))
    weights = result.x
    print("最佳权重：", weights)

    rf_test_pred = random_model.predict(x_test_rainy)
    adaboost_test_pred = adaboost_base.predict(x_test_rainy)
    lgt_test_pred = lgt_model.predict(x_test_rainy)

    preds = np.array([rf_test_pred, adaboost_test_pred, lgt_test_pred])
    ypredict_rainy = np.average(preds, axis=0, weights=weights)

    rf_test_pred_single = random_model.predict(x_test_rainy_single)
    adaboost_test_pred_single = adaboost_base.predict(x_test_rainy_single)
    lgt_test_pred_single = lgt_model.predict(x_test_rainy_single)

    preds_single = np.array([rf_test_pred_single, adaboost_test_pred_single, lgt_test_pred_single])
    ypredict_rainy_single = np.average(preds_single, axis=0, weights=weights)

    # 评估回归性能
    def Regression_performance(y, y_predict):
        MAE = metrics.mean_absolute_error(y, y_predict)
        MSE = metrics.mean_squared_error(y, y_predict)
        RMSE = np.sqrt(metrics.mean_squared_error(y, y_predict))
        r2 = r2_score(y, y_predict)
        return MAE, MSE, RMSE, r2

    # MAE_train, MSE_train, RMSE_train, r2_train = Regression_performance(y_train_rainy, y_predict_train_rainy)
    # print('Train_Mean Absolute Error:', MAE_train, '\n', 'Train_Mean Squared Error:', MSE_train, '\n',
    #       'Train_Root Mean Squared Error:', RMSE_train, '\n', 'Train_R2:', r2_train)

    MAE_test, MSE_test, RMSE_test, r2_test = Regression_performance(y_test_rainy, ypredict_rainy)
    print('Test_Mean Absolute Error:', MAE_test, '\n', 'Test_Mean Squared Error:', MSE_test, '\n',
          'Test_Root Mean Squared Error:', RMSE_test, '\n', 'Test_R2:', r2_test)

    MAE_test1, MSE_test1, RMSE_test1, r2_test1 = Regression_performance(y_test_rainy_single, ypredict_rainy_single)
    print('Test_single_Mean Absolute Error:', MAE_test1, '\n', 'Test_single_Mean Squared Error:', MSE_test1, '\n',
          'Test_single_Root Mean Squared Error:', RMSE_test1, '\n', 'Test_single_R2:', r2_test1)

    MAE_test1_era5, MSE_test1_era5, RMSE_test1_era5, r2_test1_era5 = Regression_performance(
        y_test_rainy_single_era5_pblh, y_test_rainy_single)
    print('Test_single_Mean Absolute Error_era5:', MAE_test1_era5, '\n', 'Test_single_Mean Squared Error_era5:',
          MSE_test1_era5, '\n',
          'Test_single_Root Mean Squared Error_era5:', RMSE_test1_era5, '\n', 'Test_single_R2_era5:', r2_test1_era5)


    def save_test_results():
        date_today = time.strftime('%Y-%m-%d %H:%M:%S').split()
        # print(date_today)     # ['2021-07-22', '16:11:00']
        hour_minute = date_today[1].split(':')
        feature_list=['Hour','10V', '10U', 'TP', 'SP', 'SLHF', 'SNTR', 'SSHF', 'TEE',
                         'FA', 'SNSR', 'SSRD', 'STRD', 'DT2m', 'ST', 'T2m', 'RH','SLTD']

        x_test_dataframe = pd.DataFrame(x_test_rainy, columns=feature_list)
        y_test_dataframe = pd.DataFrame(y_test_rainy, columns=['BLH_true'])
        y_test_dataframe_era5 = pd.DataFrame(y_test_rainy_ers5_pblh, columns=['BLH_era5'])
        ypredict_dataframe = pd.DataFrame(ypredict_rainy, columns=['BLH_predict'])

        x_test1_dataframe = pd.DataFrame(x_test_rainy_single, columns=feature_list)
        y_test1_dataframe = pd.DataFrame(y_test_rainy_single, columns=['BLH_true'])
        y_test1_dataframe_era5 = pd.DataFrame(y_test_rainy_single_era5_pblh, columns=['BLH_era5'])
        ypredict1_dataframe = pd.DataFrame(ypredict_rainy_single, columns=['BLH_predict'])

        x_train_dataframe = pd.DataFrame(x_train_rainy, columns=feature_list)
        y_train_dataframe = pd.DataFrame(y_train_rainy, columns=['BLH_true'])

        save_file_path = 'D:/Data/ningbo/training_results/' + model + '/' + weather_situation + '/'  + training_time + '/'

        x_test_dataframe.to_csv(
            save_file_path + model + '_x_test_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_test_dataframe.to_csv(
            save_file_path + model + '_y_test_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_test_dataframe_era5.to_csv(
            save_file_path + model + '_y_test_dataframe_era5_' + weather_situation + '_' + have_MWR + '_' + date_today[
            0] + '_' +hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        ypredict_dataframe.to_csv(
            save_file_path + model + '_ypredict_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')

        x_test1_dataframe.to_csv(
            save_file_path + model + '_x_test_single_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_test1_dataframe.to_csv(
            save_file_path + model + '_y_test_single_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_test1_dataframe_era5.to_csv(
            save_file_path + model + '_y_test_single_dataframe_era5' + weather_situation + '_' + have_MWR + '_' +
            date_today[0] + '_' +hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        ypredict1_dataframe.to_csv(
            save_file_path + model + '_ypredict_single_dataframe_' + weather_situation + '_' + have_MWR + '_' + date_today[
                0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')

        x_train_dataframe.to_csv(
            save_file_path + model + 'x_train_dataframe' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')
        y_train_dataframe.to_csv(
            save_file_path + model + 'y_train_dataframe' + weather_situation + '_' + have_MWR + '_' + date_today[0] + '_' +
            hour_minute[0] + '_' + hour_minute[1] + '_' + hour_minute[2] + '.csv', sep=',')

    save_test_results = save_test_results()